#### Paper title: Introducing the MAVERICK dataset
#### Purpose: Data presentation and visualization for main article
#### Author: Sebastian van Baalen & Kristine Höglund
#### Last updated: 2025-10-14

#### Front matter ####

# Install packages

# Uncomment to install packages

# install.packages("here") # To use relative paths
# install.packages("devtools") # To install packages from Github
# install.packages("eventreport") # To load and aggregate the MAVERICK data
# install.packages("tidyverse") # To work with tidy data
# install.packages("ggplot2") # To visualize data
# install.packages("wesanderson") # To use the Wes Anderson color palettes
# install.packages("lubridate") # To work with date variables
# install.packages("readxl") # To load data from Excel
# install.packages("sf") # To work with spatial data
# install.packages("scales") # To adapt plot scales
# install.packages("kableExtra") # To export tables
# install.packages("ggrepel") # To work with text variables

# Install the vdemdata package from Github

# devtools::install_github("vdeminstitute/vdemdata") # To load the V-Dem data

# Install the eventreport package from Github

#devtools::install_github("sebastianvanbaalen/eventreport") # To load and aggregate the MAVERICK data

# Load packages

suppressWarnings(suppressMessages(suppressPackageStartupMessages({
  library(here) # To use relative paths
  library(devtools) # To install packages from Github
  library(eventreport) # To load and aggregate the MAVERICK data
  library(tidyverse) # To work with tidy data
  library(ggplot2) # To visualize data
  library(wesanderson) # To use the Wes Anderson color palettes
  library(lubridate) # To work with date variables
  library(readxl) # To load data from Excel
  library(sf) # To work with spatial data
  library(scales) # To adapt plot scales
  library(kableExtra) # To export tables
  library(ggrepel) # To work with text variables
  library(vdemdata) # To load the V-Dem data
})))

# Define custom plot theme

my_theme <- theme_bw() +
  theme(
    plot.margin = margin(1, 1, 1, 1, "cm"),
    axis.title.y = element_text(margin = margin(t = 0, r = 10, b = 0, l = 0), size = 20),
    axis.title.y.right = element_text(margin = margin(t = 0, r = 0, b = 0, l = 10), size = 20),
    axis.title.x = element_text(margin = margin(t = 10, r = 0, b = 0, l = 0), size = 20),
    axis.text.x = element_text(size = 18),
    axis.text.y = element_text(size = 18),
    legend.position = "bottom",
    legend.title = element_text(size = 18),
    legend.text = element_text(size = 18),
    strip.text = element_text(size = 18),
    strip.background = element_rect(fill = "white", color = "black"),
    plot.title = element_text(size = 20, face = "bold", hjust = 0, margin = margin (t = 0, r = 0, b = 10, l = 0)),
    plot.subtitle = element_text(size = 14, hjust = 0, margin = margin (t = 0, r = 0, b = 40, l = 0)),
    plot.caption = element_text(size = 16, hjust = 1, margin = margin (t = 20, r = 0, b = 0, l = 0)),
    panel.grid.minor = element_blank(),
    panel.grid.major = element_blank()
  )

#### Load the most-representative MAVERICK data using the eventreport R package ####

maverick_rep <- aggregate_maverick_rep() %>% 
  mutate(country = case_when(country == "Ivory Coast" ~ "Côte d'Ivoire", TRUE ~ country))

#### Load other datasets ####

deco <- read.csv("DECO_v.1.0.csv")
ecav <- read_excel("ECAV datatset_Version 1.2.xls", col_types = "text")

#### Statistics cited in the text ####

##### Mean number of inferential clues per event report ####

round(mean(maverick_rep$certain), 2)

##### Share of event reports coded based on a single clue ####

single_clue_events <- maverick_rep %>%
  mutate(singleclue = case_when(certain == 1 ~ 1, TRUE ~ 0)) %>%
  summarize(share = mean(singleclue) * 100) %>% 
  pull(share)

round(single_clue_events, 0)

##### Average number of event reports by event ####

round(mean(maverick_rep$number_of_sources), 2)

##### Max number of event reports by event ####

max(maverick_rep$number_of_sources)

##### Number of event reports ####

nrow(maverick_event_report)

##### Share of event reports sampled in Factiva ####

maverick_event_report %>%
  summarize(
    count = nrow(.),
    count_condition = sum(sampling == 0)
  ) %>%
  mutate(proportion = round((count_condition / count) * 100, 0)) %>%
  pull(proportion)

##### Share of event reports sampled in pre-selected secondary sources ####

maverick_event_report %>%
  summarize(
    count = nrow(.),
    count_condition = sum(sampling == 1)
  ) %>%
  mutate(proportion = round((count_condition / count) * 100, 0)) %>%
  pull(proportion)

##### Number of elections ####

maverick_rep %>% 
  distinct(election) %>% 
  nrow()

##### Number of events ####

nrow(maverick_rep)

##### Number of deaths ####

sum(maverick_rep$deaths_best)

##### Number of injuries ####

sum(maverick_rep$injuries_best)

##### Number of unique actors ####

maverick_rep %>%
  select(
    event_id, actor1, actor2, actor3, actor4, actor5, actor6
  ) %>%
  pivot_longer(
    cols = c(-event_id),
    names_to = "actor",
    values_to = "actor_name"
  ) %>%
  distinct(actor_name) %>%
  filter(!is.na(actor_name)) %>%
  summarize(n = n()) %>%
  pull(n)

#### Figures and tables in the main text ####

##### Figure 2: Share of actor records by actor subtype and country ####

maverick_rep %>% 
  select(
    event_id, country, actor1_subtype, actor2_subtype, actor3_subtype,
    actor4_subtype, actor5_subtype, actor6_subtype
  ) %>% 
  pivot_longer(
    cols = c(-event_id, -country),
    names_to = "actor",
    values_to = "actor_subtype"
  ) %>% 
  filter(!is.na(actor_subtype)) %>% 
  mutate(
    actor_subtype = case_when(
      actor_subtype == "Other" ~ "Other/Unknown",
      actor_subtype == "Indeterminate" ~ "Other/Unknown",
      actor_subtype == "" ~ "Other/Unknown",
      TRUE ~ actor_subtype
    )
  ) %>% 
  group_by(actor_subtype, country) %>%
  summarize(count = n(), .groups = "drop") %>%
  group_by(country) %>%
  mutate(proportion = count / sum(count)) %>% 
  ungroup() %>%
  ggplot() +
  geom_segment(aes(x = actor_subtype , xend = actor_subtype, y = 0, yend = proportion)) +
  geom_point(aes(x = actor_subtype, y = proportion), size = 3) +
  facet_wrap(~ country) +
  geom_text(
    aes(
      x = actor_subtype, y = proportion + 0.03, 
      label = paste0(round(proportion*100,0), "%")
    ),
    size = 4, position = position_dodge(width = 0.9)
  ) +
  scale_y_continuous(labels = scales::percent_format(), breaks = seq(0, 0.5, 0.1), limits = c(0, 0.5)) +
  coord_flip() +
  labs(
    x = NULL,
    y = "Share of actor records",
    caption = "Note: Based on the most-representative aggregation set."
  ) +
  guides(color = "none") +
  my_theme +
  theme(axis.text.y = element_text(size = 12))

ggsave("actor_subtype.pdf", width = 13, height = 7, dpi = 600)

##### Figure 3: Number of deaths and injured people by location ####

# Load shapefiles

# Available at: https://data.humdata.org/dataset/geoboundaries-admin-boundaries-for-cote-d-ivoire

shapefile_civ <- st_read('civ_admbnda_adm1_cntig_ocha_itos_20180706/civ_admbnda_adm1_cntig_ocha_itos_20180706.shp')

# Available at: https://data.humdata.org/dataset/geoboundaries-admin-boundaries-for-kenya

shapefile_ke <- st_read('geoBoundaries-KEN-ADM1-all/geoBoundaries-KEN-ADM1.shp')

# Define jittering function to jitter points inside the polygons

jitter_within <- function(data, lat_col, lon_col, polygon, amount = 0.1, max_attempts = 20) {
  lat_col_sym <- rlang::ensym(lat_col)
  lon_col_sym <- rlang::ensym(lon_col)
  
  lat_col_str <- rlang::as_name(lat_col_sym)
  lon_col_str <- rlang::as_name(lon_col_sym)
  
  coords <- data %>% dplyr::select(!!lon_col_sym, !!lat_col_sym)
  n <- nrow(data)
  
  for (i in 1:max_attempts) {
    jittered <- coords %>%
      mutate(
        lon_j = .data[[lon_col_str]] + runif(n, -amount, amount),
        lat_j = .data[[lat_col_str]] + runif(n, -amount, amount)
      )
    
    pts <- st_as_sf(jittered, coords = c("lon_j", "lat_j"), crs = 4326)
    inside <- st_within(pts, st_union(polygon), sparse = FALSE)[, 1]
    
    coords[inside, lon_col_str] <- jittered$lon_j[inside]
    coords[inside, lat_col_str] <- jittered$lat_j[inside]
    
    if (all(inside)) break
  }
  
  data[[lon_col_str]] <- coords[[lon_col_str]]
  data[[lat_col_str]] <- coords[[lat_col_str]]
  data
}

# Jitter points

maverick_civ <- maverick_rep %>%
  filter(country == "Côte d'Ivoire" & geo_precision > 2 & !is.na(latitude) & !is.na(longitude)) %>%
  group_by(latitude, longitude) %>%
  summarize(
    deaths_best = sum(deaths_best, na.rm = TRUE),
    injuries_best = sum(injuries_best, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  ungroup() %>% 
  jitter_within(lat_col = latitude, lon_col = longitude, polygon = shapefile_civ, amount = 0.1) %>% 
  mutate(dataset = "MAVERICK")

maverick_ke <- maverick_rep %>%
  filter(country == "Kenya" & geo_precision > 2 & !is.na(latitude) & !is.na(longitude)) %>%
  group_by(latitude, longitude) %>%
  summarize(
    deaths_best = sum(deaths_best, na.rm = TRUE),
    injuries_best = sum(injuries_best, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  ungroup() %>% 
  jitter_within(lat_col = latitude, lon_col = longitude, polygon = shapefile_ke, amount = 0.1) %>% 
  mutate(dataset = "MAVERICK")

# Make Côte d'Ivoire map

maverick_civ %>%
  pivot_longer(
    cols = c(deaths_best, injuries_best),
    names_to = "type",
    values_to = "counts"
  ) %>%
  mutate(type = factor(type, levels = c("injuries_best", "deaths_best"))) %>%
  ggplot() +
  geom_sf(data = shapefile_civ, linewidth = 0.15, fill = "grey97") +
  coord_sf(xlim = c(-9, -2), ylim = c(4, 11), expand = FALSE) +
  geom_point(
    aes(x = longitude, y = latitude, size = counts, color = type),
    shape = 19, alpha = 0.7
  ) +
  scale_color_manual(
    values = wesanderson::wes_palette("Zissou1", 2, type = "continuous"),
    labels = c("Injuries", "Deaths"),
    guide = guide_legend(override.aes = list(size = 5))
  ) +
  scale_size(
    name = "Number of dead/injured", 
    range = c(1, 15),
    breaks = c(50, 100, 150, 200)
  ) +
  theme_void() +
  labs(
    color = "Type of victims",
    caption = "Note: Based on the most-representative aggregation set."
  ) +
  guides(
    color = guide_legend(order = 1, override.aes = list(size = 7)),
    size = guide_bins(order = 2)
  ) +
  theme(
    legend.spacing = unit(2, "lines"),
    plot.caption = element_text(size = 18, hjust = 1, margin = margin (t = 0, r = 0, b = 0, l = 0)),
    legend.title = element_text(size = 18),
    legend.text = element_text(size = 18)
  )

ggsave("map_civ.pdf", width = 13, height = 11, dpi = 600)

# Make Kenya map

maverick_ke %>%
  pivot_longer(
    cols = c(deaths_best, injuries_best),
    names_to = "type",
    values_to = "counts"
  ) %>%
  mutate(type = factor(type, levels = c("injuries_best", "deaths_best"))) %>%
  ggplot() +
  geom_sf(data = shapefile_ke, linewidth = 0.15, fill = "grey97") +
  coord_sf(xlim = c(33, 43), ylim = c(-5, 6), expand = FALSE) +
  geom_point(
    aes(x = longitude, y = latitude, size = counts, color = type),
    shape = 19, alpha = 0.7
  ) +
  scale_color_manual(
    values = wesanderson::wes_palette("Zissou1", 2, type = "continuous"),
    labels = c("Injuries", "Deaths"),
    guide = guide_legend(override.aes = list(size = 5))
  ) +
  scale_size(
    name = "Number of dead/injured", 
    range = c(1, 15),
    breaks = c(50, 100, 150, 200)
  ) +
  theme_void() +
  labs(
    color = "Type of victims",
    caption = "Note: Based on the most-representative aggregation set."
  ) +
  guides(
    color = guide_legend(order = 1, override.aes = list(size = 7)),
    size = guide_bins(order = 2)
  ) +
  theme(
    legend.spacing = unit(2, "lines"),
    plot.caption = element_text(size = 18, hjust = 1, margin = margin (t = 0, r = 0, b = 0, l = 0)),
    legend.title = element_text(size = 18),
    legend.text = element_text(size = 18)
  )

ggsave("map_ken.pdf", width = 13, height = 11, dpi = 600)

##### Figure 4: Share of violent interactions by type of violence used and actor subtype ####

# Make long dataset

maverick_actors1 <- maverick_rep %>% 
  select(event_id, country, actor1_subtype, actor2_subtype, actor3_subtype, actor4_subtype, actor5_subtype, actor6_subtype) %>% 
  pivot_longer(
    cols = c(-event_id, -country),
    names_to = "actor",
    values_to = "actor_subtype"
  ) %>% 
  mutate(actor = str_replace(actor, "_subtype", ""))

maverick_actors2 <- maverick_rep %>% 
  select(event_id, country, actor1_violence, actor2_violence, actor3_violence, actor4_violence, actor5_violence, actor6_violence) %>% 
  pivot_longer(
    cols = c(-event_id, -country),
    names_to = "actor",
    values_to = "actor_violence"
  ) %>% 
  mutate(actor = str_replace(actor, "_violence", ""))

maverick_actors <- maverick_actors1 %>% 
  inner_join(maverick_actors2, by = c("event_id", "actor", "country")) %>% 
  filter(
    actor_subtype %in% c("Security forces: Police", "Security forces: Paramilitary police")
  )

# Split the actor_violence column into separate words and get unique values

violence_types <- maverick_actors %>%
  mutate(actor_violence = str_replace_all(actor_violence, ";\\s+", ";")) %>% 
  pull(actor_violence) %>%
  str_split(";") %>%
  unlist() %>%
  unique() %>%
  .[. != ""]

# Create a column for each unique violence type and set the value to 1 or 0

for (violence in violence_types) {
  maverick_actors <- maverick_actors %>%
    mutate(!!violence := if_else(str_detect(actor_violence, fixed(violence)), 1, 0))
}

# Make plot 

maverick_actors %>%
  select(event_id, country, actor, actor_subtype, Shooting:Stabbing) %>%
  pivot_longer(
    cols = c(-event_id, -country, -actor, -actor_subtype),
    names_to = "violence_type",
    values_to = "violence"
  ) %>%
  group_by(actor_subtype, violence_type, country) %>%
  summarize(total_violence = sum(violence, na.rm = TRUE), .groups = 'drop') %>%
  left_join(
    maverick_actors %>%
      group_by(actor_subtype, country) %>%
      summarize(total_events = n(), .groups = 'drop'),
    by = c("actor_subtype", "country")
  ) %>%
  mutate(proportion = total_violence / total_events) %>% 
  mutate(
    violence_type = case_when(
      violence_type == "Indeterminate" ~ "Unknown",
      violence_type == "Melee" ~ "Beating",
      TRUE ~ violence_type
    ),
    actor_subtype = case_when(
      actor_subtype == "Security forces: Police" ~ "Police",
      TRUE ~ "Paramilitary police"
    )
  ) %>% 
  filter(violence_type != "NA") %>% 
  ggplot() +
  geom_col(aes(x = violence_type, y = proportion, fill = actor_subtype), position = "dodge") +
  geom_text(
    aes(
      x = violence_type, y = proportion + 0.02, 
      label = paste0(round(proportion*100,0), "%"), color = actor_subtype
    ),
    size = 4, fontface = "bold", position = position_dodge(width = 0.9)
  ) +
  labs(
    x = NULL, y = "Share of actor involvements",
    caption = "Note: Based on the most-representative aggregation set."
  ) +
  guides(color = "none") +
  scale_fill_manual(values = rev(wes_palette("Zissou1", 2, type = "continuous")), name = NULL) +
  scale_color_manual(values = rev(wes_palette("Zissou1", 2, type = "continuous"))) +
  scale_y_continuous(labels = percent, breaks = seq(0, 1, by = 0.1)) +
  coord_flip() +
  facet_wrap(~ country) +
  my_theme +
  theme(
    legend.position = "top",
    legend.justification = c(0, 1),
    legend.box.just = "left",
    legend.margin = margin(0, 0, 0, 0)
  )

ggsave("violence_repertoires.pdf", width = 13, height = 8.9, dpi = 600)

#### Appendix A: Situating the cases ####

##### Table AI: Number of deaths by country ####

top_countries <- deco %>% 
  group_by(country) %>% 
  summarize(deaths = sum(best)) %>% 
  arrange(desc(deaths)) %>% 
  slice_max(order_by = deaths, n = 20) %>%
  mutate(
    country = case_when(
      country == "Ivory Coast" ~ "\\textbf{Côte d'Ivoire}",
      country == "Kenya" ~ "\\textbf{Kenya}",
      TRUE ~ country
    ),
    deaths = format(deaths, big.mark = ",")
  )

top_countries %>% 
  kable(
  "latex",
  booktabs = TRUE,
  col.names = c(
    "Country", "Number of deaths"
  ),
  caption = "20 most electoral violence-affected countries 1989--2017",
  label = "deaths-deco",
  position = "h!",
  align = "ll",
  escape = FALSE
) %>%
  kable_styling(latex_options = "scale_down", font_size = 10) %>% 
  footnote(
    general = "Based on data from DECO (Fjelde & Höglund, 2022).",
    threeparttable = TRUE
  ) %>%
  save_kable(file = "table_a1.tex")

# Load V-DEM data

vdem <- vdemdata::vdem %>% 
  filter(year > 1991 & year < 2023 & (v2xel_elecpres == 1 | v2xel_elecparl == 1)) %>% 
  select(
    country_name, country_text_id, year, v2elpeace, v2elintim, v2x_regime,
    v2xnp_client, v2x_jucon, v2xlg_legcon, v2psorgs, v2psprbrch, v2psprlnks, 
    v2pscnslnl, v2pscohesv, v2cacamps
  )

# Prepare the data

vdem <- vdem %>% 
  mutate(
    country_name = case_when(country_name == "Ivory Coast" ~ "Côte d'Ivoire", TRUE ~ country_name),
    v2x_regime = case_when(
      v2x_regime == 0 ~ "Closed autocracy",
      v2x_regime == 1 ~ "Electoral autocracy",
      v2x_regime == 2 ~ "Electoral democracy",
      v2x_regime == 3 ~ "Liberal democracy"
    ),
    v2elpeace = v2elpeace * -1,
    v2elintim = v2elintim * -1,
    color = case_when(
      country_name == "Côte d'Ivoire" ~ "Côte d'Ivoire",
      country_name == "Kenya" ~ "Kenya",
      TRUE ~ "Other"
    ),
    size = case_when(
      country_name %in% c("Côte d'Ivoire", "Kenya") ~ 7, 
      TRUE ~ 0.4
    ),
    year = case_when(
      country_name %in% c("Côte d'Ivoire", "Kenya") ~ year, 
      TRUE ~ NA
    ),
    order = case_when(
      country_name == "Côte d'Ivoire" ~ 2,
      country_name == "Kenya" ~ 2,
      TRUE ~ 1
    ),
    executive_constraints = v2x_jucon * v2xlg_legcon,
    v2psorgs = (v2psorgs - mean(v2psorgs))/sd(v2psorgs), 
    v2psprbrch = (v2psprbrch - mean(v2psprbrch))/sd(v2psprbrch), 
    v2psprlnks = (v2psprlnks - mean(v2psprlnks))/sd(v2psprlnks), 
    v2pscnslnl = (v2pscnslnl - mean(v2pscnslnl))/sd(v2pscnslnl), 
    v2pscohesv = (v2pscohesv - mean(v2pscohesv))/sd(v2pscohesv),
    party_strength = (v2psorgs + v2psprbrch + v2psprlnks + v2pscnslnl + v2pscohesv) / 5
  ) %>% 
  arrange(order)

# Set boxplot theme

box_theme <- theme_bw() +
  theme(
    legend.position = "top",
    legend.justification = c(0, 1),
    legend.box.just = "left",
    legend.margin = margin(0, 0, 0, 0),
    axis.title.y = element_text(margin = margin(t = 0, r = 10, b = 0, l = 0), size = 24),
    axis.title.x = element_text(margin = margin(t = 10, r = 0, b = 0, l = 0), size = 24),
    axis.text.x = element_text(size = 22),
    axis.text.y = element_text(size = 22),
    legend.title = element_text(size = 22),
    legend.text = element_text(size = 22),
    plot.margin = margin(1, 1, 1, 1, "cm"),
    panel.grid.minor = element_blank(),
    panel.grid.major = element_blank()
  )

##### Figure A1: Boxplot of electoral violence intensity by regime type ####

vdem %>% 
  ggplot() +
  geom_boxplot(aes(y = v2elpeace, x = v2x_regime), fill = "lightgrey") +
  geom_jitter(aes(y = v2elpeace, x = v2x_regime, color = color, size = size), alpha = 0.9) +
  scale_color_manual(
    values = c("Côte d'Ivoire" = "#3A9AB2", "Kenya" = "#F11B00", "Other" = "black"),
    name = NULL,
    guide = guide_legend(
      override.aes = list(size = 7)
    )
  ) +
  scale_size_identity() +
  scale_x_discrete(
    labels = c(
      "Liberal democracy" = "Liberal\ndemocracy",
      "Electoral democracy" = "Electoral\ndemocracy",
      "Electoral autocracy" = "Electoral\nautocracy",
      "Closed autocracy" = "Closed\nautocracy"
    )
  ) +
  scale_y_continuous(breaks = seq(-2.5, 7.5, 2.5), limits = c(-2.5, 5)) +
  labs(
    y = "Estimated intensity of electoral violence",
    x = NULL
  ) +
  box_theme

ggsave("figure_a1_vdem_violence.pdf", width = 13, height = 8.9, dpi = 600)

##### Figure A1: Boxplot of government electoral intimidation intensity by regime type ####

vdem %>% 
  ggplot() +
  geom_boxplot(aes(y = v2elintim, x = v2x_regime), fill = "lightgrey") +
  geom_jitter(aes(y = v2elintim, x = v2x_regime, color = color, size = size), alpha = 0.9) +
  scale_color_manual(
    values = c("Côte d'Ivoire" = "#3A9AB2", "Kenya" = "#F11B00", "Other" = "black"),
    name = NULL,
    guide = guide_legend(
      override.aes = list(size = 7)
    )
  ) +
  scale_size_identity() +
  scale_x_discrete(
    labels = c(
      "Liberal democracy" = "Liberal\ndemocracy",
      "Electoral democracy" = "Electoral\ndemocracy",
      "Electoral autocracy" = "Electoral\nautocracy",
      "Closed autocracy" = "Closed\nautocracy"
    )
  ) +
  labs(
    y = "Estimated intensity of government electoral intimidation",
    x = NULL
  ) +
  box_theme

ggsave("figure_a1_vdem_repression.pdf", width = 13, height = 8.9, dpi = 600)

##### Figure A1: Boxplot of clientelism by regime type ####

vdem %>% 
  ggplot() +
  geom_boxplot(aes(y = v2xnp_client, x = v2x_regime), fill = "lightgrey") +
  geom_jitter(aes(y = v2xnp_client, x = v2x_regime, color = color, size = size), alpha = 0.9) +
  scale_color_manual(
    values = c("Côte d'Ivoire" = "#3A9AB2", "Kenya" = "#F11B00", "Other" = "black"),
    name = NULL,
    guide = guide_legend(
      override.aes = list(size = 7)
    )
  ) +
  scale_size_identity() +
  scale_y_continuous(breaks = seq(-0, 1, 0.25), limits = c(0, 1)) +
  scale_x_discrete(
    labels = c(
      "Liberal democracy" = "Liberal\ndemocracy",
      "Electoral democracy" = "Electoral\ndemocracy",
      "Electoral autocracy" = "Electoral\nautocracy",
      "Closed autocracy" = "Closed\nautocracy"
    )
  ) +
  labs(
    y = "Estimated degree of political clientelism",
    x = NULL
  ) +
  box_theme

ggsave("figure_a1_vdem_clientelism.pdf", width = 13, height = 8.9, dpi = 600)

##### Figure A1: Boxplot of executive constraints by regime type ####

vdem %>% 
  ggplot() +
  geom_boxplot(aes(y = executive_constraints, x = v2x_regime), fill = "lightgrey") +
  geom_jitter(aes(y = executive_constraints, x = v2x_regime, color = color, size = size), alpha = 0.9) +
  scale_color_manual(
    values = c("Côte d'Ivoire" = "#3A9AB2", "Kenya" = "#F11B00", "Other" = "black"),
    name = NULL,
    guide = guide_legend(
      override.aes = list(size = 7)
    )
  ) +
  scale_size_identity() +
  scale_y_continuous(breaks = seq(-0, 1, 0.25), limits = c(0, 1)) +
  scale_x_discrete(
    labels = c(
      "Liberal democracy" = "Liberal\ndemocracy",
      "Electoral democracy" = "Electoral\ndemocracy",
      "Electoral autocracy" = "Electoral\nautocracy",
      "Closed autocracy" = "Closed\nautocracy"
    )
  ) +
  labs(
    y = "Estimated degree of executive constraints",
    x = NULL
  ) +
  box_theme

ggsave("figure_a1_vdem_constraints.pdf", width = 13, height = 8.9, dpi = 600)

##### Figure A1: Boxplot of party strength by regime type ####

vdem %>% 
  ggplot() +
  geom_boxplot(aes(y = party_strength, x = v2x_regime), fill = "lightgrey") +
  geom_jitter(aes(y = party_strength, x = v2x_regime, color = color, size = size), alpha = 0.9) +
  scale_color_manual(
    values = c("Côte d'Ivoire" = "#3A9AB2", "Kenya" = "#F11B00", "Other" = "black"),
    name = NULL,
    guide = guide_legend(
      override.aes = list(size = 7)
    )
  ) +
  scale_size_identity() +
  scale_x_discrete(
    labels = c(
      "Liberal democracy" = "Liberal\ndemocracy",
      "Electoral democracy" = "Electoral\ndemocracy",
      "Electoral autocracy" = "Electoral\nautocracy",
      "Closed autocracy" = "Closed\nautocracy"
    )
  ) +
  labs(
    y = "Estimated degree of political party strength",
    x = NULL
  ) +
  box_theme

ggsave("figure_a1_vdem_party_strength.pdf", width = 13, height = 8.9, dpi = 600)

##### Figure A1: Boxplot of political polarization by regime type ####

vdem %>% 
  ggplot() +
  geom_boxplot(aes(y = v2cacamps, x = v2x_regime), fill = "lightgrey") +
  geom_jitter(aes(y = v2cacamps, x = v2x_regime, color = color, size = size), alpha = 0.9) +
  scale_color_manual(
    values = c("Côte d'Ivoire" = "#3A9AB2", "Kenya" = "#F11B00", "Other" = "black"),
    name = NULL,
    guide = guide_legend(
      override.aes = list(size = 7)
    )
  ) +
  scale_size_identity() +
  scale_x_discrete(
    labels = c(
      "Liberal democracy" = "Liberal\ndemocracy",
      "Electoral democracy" = "Electoral\ndemocracy",
      "Electoral autocracy" = "Electoral\nautocracy",
      "Closed autocracy" = "Closed\nautocracy"
    )
  ) +
  labs(
    y = "Estimated degree of political polarization",
    x = NULL
  ) +
  box_theme

ggsave("figure_a1_vdem_polarization.pdf", width = 13, height = 8.9, dpi = 600)

#### Appendix B: Comparison with other datasets ####

# Prepare MAVERICK most-representative

maverick_rep_month <- maverick_rep %>%
  mutate(
    date_start = as.Date(date_start, format = "%Y-%m-%d"),
    year_month = floor_date(date_start, "month")
  ) %>%
  group_by(country, year_month) %>%
  summarize(deaths_best = sum(deaths_best, na.rm = TRUE), events = n()) %>%
  ungroup() %>%
  complete(
    country,
    year_month = seq(as.Date("1995-01-01"), as.Date("2022-01-01"), by = "month"),
    fill = list(deaths_best = 0, events = 0)
  ) %>%
  mutate(dataset = "MAVERICK")

# Prepare DECO

deco_month <- deco %>%
  filter(country %in% c("Kenya", "Ivory Coast")) %>%
  mutate(country = case_when(country == "Ivory Coast" ~ "Côte d'Ivoire", TRUE ~ country)) %>% 
  rename(deaths_best = best) %>% 
  mutate(
    date_start = mdy(date_start), 
    year_month = floor_date(date_start, "month")
  ) %>%
  filter(year_month > "1988-12-01") %>% 
  group_by(country, year_month) %>%
  summarize(deaths_best = sum(deaths_best, na.rm = TRUE), events = n()) %>%
  ungroup() %>%
  complete(
    country, 
    year_month = seq(as.Date("1988-01-01"), as.Date("2023-01-01"), by = "month"), 
    fill = list(deaths_best = 0, events = 0)
  ) %>% 
  mutate(
    events = case_when(year_month > "2017-12-01" ~ NA_real_, TRUE ~ events),
    deaths_best = case_when(year_month > "2017-12-01" ~ NA_real_, TRUE ~ deaths_best),
    dataset = "DECO"
  )

# Prepare ECAV

ecav_month <- ecav %>% 
  filter(country %in% c("Kenya", "Ivory Coast") & EventViolence == 1) %>%
  mutate(country = case_when(country == "Ivory Coast" ~ "Côte d'Ivoire", TRUE ~ country)) %>% 
  mutate(
    date_start = as.Date(Date, format = "%Y-%m-%d"), 
    year_month = floor_date(date_start, "month")
  ) %>% 
  filter(year_month > "1994-12-01") %>% 
  group_by(country, year_month) %>% 
  summarize(events = n()) %>%
  ungroup() %>%
  complete(
    country, 
    year_month = seq(as.Date("1995-01-01"), as.Date("2023-01-01"), by = "month"), 
    fill = list(events = 0)
  ) %>% 
  mutate(
    events = case_when(year_month > "2012-12-01" ~ NA_real_, TRUE ~ events),
    deaths_best = NA_real_,
    dataset = "ECAV"
  )

# Merge dataframes

df <- rbind(maverick_rep_month, deco_month, ecav_month)

##### Figure B1: Number of events by month, dataset, and country ####

df %>% 
  filter(dataset %in% c("DECO", "ECAV", "MAVERICK")) %>% 
  filter(year_month < "2023-01-01") %>% 
  ggplot() +
  geom_area(aes(x = year_month, y = events, color = dataset, fill = dataset), position = position_dodge(0.8)) +
  labs(
    x = NULL, 
    y = "Number of electoral violence events",
    caption = "Note: Based on the most-representative aggregation set."
  ) +
  scale_x_date(date_breaks = "3 year", date_labels = "%Y", limits = c(as.Date("1991-01-01"), as.Date("2023-01-01"))) +
  scale_y_continuous(breaks = seq(0,300,50)) +
  scale_fill_manual(values = wes_palette("Zissou1", 3, type = "continuous")) +
  scale_color_manual(values = wes_palette("Zissou1", 3, type = "continuous")) +
  my_theme +
  facet_grid(dataset ~ country) +
  guides(color = "none", fill = "none") +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    legend.position.inside = c(0.05, 0.95),
    legend.justification = c(0, 1),
    legend.box.just = "left",
    legend.background = element_rect(fill = "transparent"),
    legend.margin = margin(0, 0, 0, 0)
  )

ggsave("figure_b1.pdf", width = 13, height = 11, dpi = 600)

##### Figure B2: Number of deaths by month, dataset, and country ####

df %>% 
  filter(dataset %in% c("DECO", "MAVERICK")) %>% 
  filter(year_month < "2023-01-01") %>% 
  ggplot() +
  geom_area(aes(x = year_month, y = deaths_best, color = dataset, fill = dataset), position = position_dodge(0.8)) +
  labs(
    x = NULL, 
    y = "Number of electoral violence deaths",
    caption = "Note: Based on the most-representative aggregation set."
  ) +
  scale_x_date(date_breaks = "3 year", date_labels = "%Y", limits = c(as.Date("1991-01-01"), as.Date("2023-01-01"))) +
  scale_y_continuous(breaks = seq(0,2000,200)) +
  scale_fill_manual(values = wes_palette("Zissou1", 2, type = "continuous")) +
  scale_color_manual(values = wes_palette("Zissou1", 2, type = "continuous")) +
  my_theme +
  facet_grid(dataset ~ country) +
  guides(color = "none", fill = "none") +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    legend.position = c(0.02, 0.95),
    legend.position.inside = c(0.05, 0.95),
    legend.justification = c(0, 1),
    legend.box.just = "left",
    legend.background = element_rect(fill = "transparent"),
    legend.margin = margin(0, 0, 0, 0)
  )

ggsave("figure_b2.pdf", width = 13, height = 11, dpi = 600)

##### Figure B3: Number of deaths by location and dataset ####

# Jitter points

deco_sf_civ <- deco %>%
  mutate(longitude = as.numeric(longitude)) %>% 
  filter(country == "Ivory Coast" & !is.na(latitude) & !is.na(longitude) & latitude != 8.00) %>%
  group_by(latitude, longitude) %>%
  summarize(
    best = sum(best, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  ungroup() %>% 
  jitter_within(lat_col = latitude, lon_col = longitude, polygon = shapefile_civ, amount = 0.1) %>% 
  mutate(dataset = "DECO") %>% 
  rename(deaths_best = best)

deco_sf_ke <- deco %>%
  mutate(longitude = as.numeric(longitude)) %>%
  filter(country == "Kenya" & !is.na(latitude) & !is.na(longitude)) %>%
  group_by(latitude, longitude) %>%
  summarize(
    best = sum(best, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  ungroup() %>% 
  jitter_within(lat_col = latitude, lon_col = longitude, polygon = shapefile_ke, amount = 0.1) %>%
  mutate(dataset = "DECO") %>% 
  rename(deaths_best = best)

# Make Côte d'Ivoire map

maverick_civ %>% 
  bind_rows(deco_sf_civ) %>% 
  ggplot() +
  geom_sf(data = shapefile_civ, linewidth = 0.15, fill = "grey97") +
  coord_sf(xlim = c(-9, -2), ylim = c(4, 11), expand = FALSE) +
  geom_point(
    aes(x = longitude, y = latitude, size = deaths_best, color = dataset),
    shape = 19, alpha = 0.7
  ) +
  scale_color_manual(
    values = wesanderson::wes_palette("Zissou1", 2, type = "continuous"),
    guide = guide_legend(override.aes = list(size = 5))
  ) +
  scale_size(
    name = "Number of deaths", 
    range = c(1, 15),
    breaks = c(100, 200, 300, 400)
  ) +
  theme_void() +
  labs(
    color = "Dataset",
    caption = "Note: Based on the most-representative aggregation set."
  ) +
  guides(
    color = guide_legend(order = 1, override.aes = list(size = 7)),
    size = guide_bins(order = 2)
  ) +
  theme(
    legend.spacing = unit(2, "lines"),
    plot.caption = element_text(size = 18, hjust = 1, margin = margin (t = 0, r = 0, b = 0, l = 0)),
    legend.title = element_text(size = 18),
    legend.text = element_text(size = 18)
  )

ggsave("figure_b3_map_civ_deco.pdf", width = 13, height = 11, dpi = 600)

# Make Kenya map

maverick_ke %>% 
  bind_rows(deco_sf_ke) %>% 
  ggplot() +
  geom_sf(data = shapefile_ke, linewidth = 0.15, fill = "grey97") +
  coord_sf(xlim = c(33, 43), ylim = c(-5, 6), expand = FALSE) +
  geom_point(
    aes(x = longitude, y = latitude, size = deaths_best, color = dataset),
    shape = 19, alpha = 0.7
  ) +
  scale_color_manual(
    values = wesanderson::wes_palette("Zissou1", 2, type = "continuous"),
    guide = guide_legend(override.aes = list(size = 5))
  ) +
  scale_size(
    name = "Number of deaths", 
    range = c(1, 15),
    breaks = c(100, 200, 300, 400)
  ) +
  theme_void() +
  labs(
    color = "Dataset",
    caption = "Note: Based on the most-representative aggregation set."
  ) +
  guides(
    color = guide_legend(order = 1, override.aes = list(size = 7)),
    size = guide_bins(order = 2)
  ) +
  theme(
    legend.spacing = unit(2, "lines"),
    plot.caption = element_text(size = 18, hjust = 1, margin = margin (t = 0, r = 0, b = 0, l = 0)),
    legend.title = element_text(size = 18),
    legend.text = element_text(size = 18)
  )

ggsave("figure_b3_map_ken_deco.pdf", width = 13, height = 11, dpi = 600)

#### Appendix C: Additional descriptive statistics ####

##### Figure C1: Number of event reports by inferential clue ####

maverick_rep %>% 
  select(event_id, certain1:certain6) %>% 
  pivot_longer(
    cols = -event_id,
    names_to = "criteria",
    values_to = "selected"
  ) %>% 
  group_by(criteria) %>% 
  summarize(sum = sum(selected)) %>% 
  mutate(
    criteria = case_when(
      criteria == "certain1" ~ "The report/another report identified\nthe event as election-related",
      criteria == "certain2" ~ "At least one of the actors reportedly\n had ties to a political party",
      criteria == "certain3" ~ "At least one of the targets was\n election-related",
      criteria == "certain4" ~ "The reported purpose was to influence\n an electoral process or outcome",
      criteria == "certain5" ~ "The event was part of an episode\n of electoral violence",
      criteria == "certain6" ~ "The event occurred at most 6 months\n prior to or after an election"
    )
  ) %>% 
  ggplot() +
  geom_segment(aes(x = criteria , xend = criteria, y = 0, yend = sum)) +
  geom_point(aes(x = criteria, y = sum), size = 3) +
  geom_text(
    aes(
      x = criteria, y = sum + 300, 
      label = paste0(sum, " event reports")
    ),
    size = 4, position = position_dodge(width = 0.9)
  ) +
  scale_fill_manual(values = wes_palette("Zissou1", 6, type = "continuous")) +
  scale_y_continuous(limits = c(0, 2700), breaks = seq(0, 3000, 500)) +
  coord_flip() +
  labs(
    y = "Number of event reports",
    x = NULL
  ) +
  guides(fill = "none") +
  my_theme

ggsave("figure_c1.pdf", width = 13, height = 8.9, dpi = 600)

##### Figure C2: Number of event reports by source type ####

maverick_event_report %>% 
  group_by(source_type) %>%
  summarize(count = n(), .groups = "drop") %>%
  mutate(proportion = count / sum(count)) %>% 
  ungroup() %>%
  ggplot() +
  geom_segment(aes(x = source_type , xend = source_type, y = 0, yend = proportion)) +
  geom_point(aes(x = source_type, y = proportion), size = 3) +
  geom_text(
    aes(
      x = source_type, y = proportion + 0.02, 
      label = paste0(round(proportion * 100, 0), "%")
    ),
    size = 4, position = position_dodge(width = 0.9)
  ) +
  coord_flip() +
  scale_y_continuous(labels = scales::percent_format(), breaks = seq(0, 0.7, 0.2), limits = c(0, 0.69)) +
  labs(
    x = NULL,
    y = "Share of event reports (%)"
  ) +
  guides(fill = "none", color = "none") +
  my_theme

ggsave("figure_c2.pdf", width = 13, height = 7, dpi = 600)

##### Table C1: Ten most frequently involved actors by country ####

top_actors_by_country <- maverick_rep %>% 
  select(
    event_id, country, actor1, actor2, actor3, actor4, actor5, actor6
  ) %>% 
  pivot_longer(
    cols = c(-event_id, -country),
    names_to = "actor",
    values_to = "actor_name"
  ) %>% 
  filter(!is.na(actor_name)) %>% 
  group_by(actor_name, country) %>% 
  summarize(n = n(), .groups = "drop") %>% 
  group_by(country) %>% 
  slice_max(order_by = n, n = 10, with_ties = FALSE) %>% 
  arrange(country, -n)

top_actors_by_country <- top_actors_by_country %>%
  group_by(country) %>%
  mutate(rank = row_number()) %>%
  pivot_wider(
    names_from = country,
    values_from = c(actor_name, n),
    names_sep = "_"
  ) %>%
  arrange(rank) %>%
  select(
    `actor_name_Côte d'Ivoire`, `n_Côte d'Ivoire`, 
    actor_name_Kenya, n_Kenya
  )

top_actors_by_country %>%
  kable(
    "latex",
    booktabs = TRUE,
    col.names = c(
      "Actor name (Côte d'Ivoire)", "Number of events", 
      "Actor name (Kenya)", "Number of events"
    ),
    caption = "Ten most frequently involved actors by country (most-representative aggregation set)",
    label = "actors",
    position = "t",
    align = "lclc"
  ) %>%
  kable_styling(latex_options = "scale_down") %>% 
  save_kable(file = "table_c1.tex")

##### Figure C3: Number and share of electoral violence events by event context and country ####

maverick_rep %>%
  mutate(
    event_context = case_when(
      is.na(event_context) ~ "Other/Unknown",
      event_context == "Unknown" ~ "Other/Unknown",
      event_context == "Other context" ~ "Other/Unknown",
      event_context == "Indeterminate" ~ "Other/Unknown",
      TRUE ~ event_context
    )
  ) %>%
  group_by(event_context, country) %>%
  summarize(count = n(), .groups = "drop") %>%
  group_by(country) %>%
  mutate(proportion = count / sum(count)) %>% 
  ungroup() %>%
  ggplot() +
  geom_segment(aes(x = event_context , xend = event_context, y = 0, yend = proportion)) +
  geom_point(aes(x = event_context, y = proportion), size = 3) +
  geom_text(
    aes(
      x = event_context, y = proportion + 0.08, 
      label = paste0(count, " events")
    ),
    size = 4, position = position_dodge(width = 0.9)
  ) +
  coord_flip() +
  facet_wrap(~ country) +
  scale_y_continuous(labels = scales::percent_format(), breaks = seq(0, 0.7, 0.2), limits = c(0, 0.69)) +
  labs(
    x = NULL,
    y = "Share of events (%)",
    caption = "Note: Based on the most-representative aggregation set."
  ) +
  my_theme

ggsave("figure_c3.pdf", width = 13, height = 7, dpi = 600)

##### Figure C4: Number of electoral violence events by days to closest election ####

# Create election dates dataframe

election_dates <- tibble(
  country = c(
    "Côte d'Ivoire", "Côte d'Ivoire", "Côte d'Ivoire", "Côte d'Ivoire", "Côte d'Ivoire",
    "Côte d'Ivoire", "Côte d'Ivoire", "Côte d'Ivoire", "Côte d'Ivoire", "Côte d'Ivoire",
    "Côte d'Ivoire", "Côte d'Ivoire", "Côte d'Ivoire", "Côte d'Ivoire",
    "Kenya", "Kenya", "Kenya", "Kenya", "Kenya", "Kenya", "Kenya", "Kenya"
  ),
  date = ymd(
    c(
      "1995-10-22", # Ivorian presidential election 1995
      "1995-11-26", # Ivorian legislative election 1995
      "2000-10-22", # Ivorian presidential election 2000
      "2000-12-10", # Ivorian legislative election 2001
      "2001-03-25", # Ivorian municipal election 2001
      "2002-07-07", # Ivorian local election 2002
      "2010-10-31", # Ivorian presidential election 2010
      "2010-11-28", # Ivorian presidential election 2010
      "2011-12-11", # Ivorian legislative election 2011
      "2013-04-21", # Ivorian municipal election 2013
      "2015-10-25", # Ivorian presidential election 2015
      "2018-10-13", # Ivorian municipal and senatorial election 2018
      "2020-10-31", # Ivorian presidential election 2020
      "2021-03-06", # Ivorian legislative election 2021
      "1992-12-29", # Kenyan general election 1992
      "1997-12-29", # Kenyan general election 1997
      "2002-12-27", # Kenyan general election 2002
      "2007-12-27", # Kenyan general election 2007
      "2013-03-04", # Kenyan general election 2013
      "2017-08-08", # Kenyan general election 2017
      "2017-10-26", # Kenyan general election 2017 (rerun)
      "2022-08-09" # Kenyan general election 2022
    )
  )
)

# Define a function to find the days difference to the closest election date

closest_days_to_election <- function(event_date, election_dates) {
  election_diffs <- as.numeric(difftime(event_date, election_dates$date, units = "days"))
  election_diffs[which.min(abs(election_diffs))]
}

maverick_rep %>%
  mutate(date_start = as.Date(date_start)) %>% 
  rowwise() %>%
  mutate(
    event_timing_start = closest_days_to_election(date_start, election_dates),
    event_timing_end = closest_days_to_election(date_end, election_dates)
  ) %>%
  ungroup() %>% 
  ggplot() +
  geom_vline(xintercept = 0, linetype = "dashed") +
  geom_bar(aes(x = event_timing_start), color = "#F11B00", alpha = 0.5) +
  annotate(
    "label",
    x = 0,
    y = 160,
    label = "Election day",
    size = 5,
    lineheight = 1,
    hjust = "center"
  ) +
  scale_x_continuous(breaks = seq(-1000, 1000, 100), limits = c(-800, 800)) +
  facet_wrap(~ country, nrow = 2) +
  labs(
    x = "Days to closest election",
    y = "Number of events",
    caption = "Note: Based on the most-representative aggregation set."
  ) +
  my_theme

ggsave("figure_c4.pdf", width = 13, height = 10, dpi = 600)

##### Figure C5: Number of deaths and injured people by election ####

maverick_rep %>%
  mutate(
    election = case_when(
      election %in% c("", "Indeterminate") ~ "Unknown",
      TRUE ~ election
    )
  ) %>% 
  group_by(election) %>%
  summarize(Deaths = sum(deaths_best), Injuries = sum(injuries_best), .groups = "drop") %>%
  pivot_longer(
    cols = c(-election, Deaths, Injuries),
    names_to = "type",
    values_to = "count"
  ) %>% 
  ggplot() +
  geom_col(aes(x = election, y = count, fill = type), position = position_dodge(width = 1)) +
  coord_flip() +
  scale_y_continuous(breaks = seq(0, 2000, 200)) +
  scale_fill_manual(values = rev(wes_palette("Zissou1", 2, type = "continuous")), name = NULL) +
  labs(
    x = NULL,
    y = "Number of deaths/injured people",
    caption = "Note: Based on the most-representative aggregation set."
  ) +
  my_theme +
  theme(
    legend.position = "top",
    legend.justification = c(0, 1),
    legend.box.just = "left",
    legend.margin = margin(0, 0, 0, 0)
  )

ggsave("figure_c5.pdf", width = 13, height = 13, dpi = 600)